#!/usr/bin/env vpython3
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Script for finding and suppressing flaky GPU tests.

This relies on ResultDB BigQuery data under the hood, so it requires the `bq`
tool which is available as part of the Google Cloud SDK
https://cloud.google.com/sdk/docs/quickstarts.

Example usage, which finds all failures in the past 5 days. Any tests that
failed more than twice on a configuration is marked as flaky, and any that
failed more than 5 times is marked as failing:

suppress_flakes.py \
  --project chrome-unexpected-pass-data \
  --sample-period 5
"""

import argparse

from flake_suppressor import expectations
from flake_suppressor import queries
from flake_suppressor import result_output
from flake_suppressor import results as results_module


def ParseArgs():
  # TODO(crbug.com/1192733): Add flaky and failure thresholds, likely in the
  # form of % of failures out of the total runs for a (test, tags) combination.
  # <1% can be ignored, > 50% can be treated as a failure instead of a flake.
  parser = argparse.ArgumentParser(
      description=('Script for automatically suppressing flaky/failing GPU '
                   'Telemetry-based tests.'))
  parser.add_argument('--project',
                      required=True,
                      help=('The billing project to use for BigQuery queries. '
                            'Must have access to the ResultDB BQ tables, e.g. '
                            '"chrome-luci-data.chromium.gpu_ci_test_results".'))
  parser.add_argument('--sample-period',
                      type=int,
                      default=1,
                      help=('The number of days to sample data from.'))
  parser.add_argument('--no-group-by-tags',
                      action='store_false',
                      default=True,
                      dest='group_by_tags',
                      help=('Append added expectations to the end of the file '
                            'instead of attempting to automatically group with '
                            'similar expectations.'))
  parser.add_argument('--no-prompt-for-user-input',
                      action='store_false',
                      default=True,
                      dest='prompt_for_user_input',
                      help=('Generate expectations automatically based on '
                            'thresholds instead of prompting the user each '
                            'time. The user will still need to add associated '
                            'bugs to generated expectations afterwards.'))
  parser.add_argument('--ignore-threshold',
                      type=float,
                      default=0.01,
                      help=('The fraction of failed tests under which flakes '
                            'will be ignored instead of having an expectation '
                            'added when --no-prompt-for-user-input is used.'))
  parser.add_argument('--flaky-threshold',
                      type=float,
                      default=0.5,
                      help=('The fraction of failed tests under which flakes '
                            'will be marked as RetryOnFailure when '
                            '--no-prompt-for-user-input is used. Above this, '
                            'failures will be marked as Failure.'))
  parser.add_argument('--include-all-tags',
                      action='store_true',
                      default=False,
                      help=('Use all tags generated by a configuration when '
                            'creating an expectation rather than attempting '
                            'to only use the most specific one. This should '
                            'only need to be passed if the tags in the '
                            'expectation files are not ordered from least '
                            'specific to most specific.'))
  args = parser.parse_args()

  if not args.prompt_for_user_input:
    if args.ignore_threshold < 0:
      raise ValueError('--ignore-threshold must be positive')
    if args.flaky_threshold < 0:
      raise ValueError('--flaky-threshold must be positive')
    if args.flaky_threshold <= args.ignore_threshold:
      raise ValueError(
          '--flaky-threshold must be greater than --ignore-threshold')

  return args


def main():
  args = ParseArgs()
  expectations.AssertCheckoutIsUpToDate()
  querier_instance = queries.BigQueryQuerier(args.sample_period, args.project)
  results = querier_instance.GetFlakyOrFailingCiTests()
  results.extend(querier_instance.GetFlakyOrFailingTryTests())
  aggregated_results = results_module.AggregateResults(results)
  result_output.GenerateHtmlOutputFile(aggregated_results)
  print('If there are many instances of failed tests, that may be indicative '
        'of an issue that should be handled in some other way, e.g. reverting '
        'a bad CL.')
  if args.prompt_for_user_input:
    input('\nBeginning of user input section - press any key to continue')
    expectations.IterateThroughResultsForUser(aggregated_results,
                                              args.group_by_tags,
                                              args.include_all_tags)
  else:
    result_counts = querier_instance.GetResultCounts()
    expectations.IterateThroughResultsWithThresholds(
        aggregated_results, args.group_by_tags, result_counts,
        args.ignore_threshold, args.flaky_threshold, args.include_all_tags)
    print('\nGenerated expectations will need to have bugs manually added.')
  print('\nGenerated expectations likely contain conflicting tags that need to '
        'be removed.')


if __name__ == '__main__':
  main()
